#do experimental analysis with R

install.packages("Gmisc")
install.packages("fdrtool")
install.packages("sjp.likert")
install.packages("easyGgplot2")
install.packages("devtools")
install.packages("ggplot2.histogram")
install_github("easyGgplot2", "kassambara")
install_github("easyGgplot2", "kassambara")
install.packages("interplot")
install.packages("AER")

library(MASS)
library(htmlTable)
library(interplot)
library(Gmisc)
library(fdrtool)
library(grid)
library(forestplot)
library(ggplot2)
library(grid)
library(gridExtra)
library(pastecs)
library(foreign)
require(ggplot2)
library(corrplot)
library(psych)
library(devtools)
library(easyGgplot2)
library(reshape2)
library(RColorBrewer)
library(dplyr)
library(ggthemes)
library(stringr)
library(AER)

#set to local directory
setwd("")

#load data
library(foreign)
data <- read.dta("discr_beliefs.dta")

#prepare data
data$n_procon <- ifelse(data$n_procon == "for equal treatment", 1, 0)
data$conservative <- ifelse(data$conservative == "yes", 1, 0)
data$n_procon2 <- factor(data$n_procon, labels=c("AGAINST", "FOR"))

###################################################
###################################################

#Table 2: Summary Statistics of responses??? quality
summary(data$all_total)
sd(data$all_total)
summary(data$ca_total)
sd(data$ca_total)
summary(data$hdesk_total)
sd(data$hdesk_total)

#################################################
#################################################

# Table 3: Assessing treatment effect in raw form
# t-test
t.test(data$answer ~ data$n_procon) 
t.test(data$hdesk_total ~ data$n_procon)
t.test(data$ca_total ~ data$n_procon)
t.test(data$all_total ~ data$n_procon) 
t.test(data$extra_bin ~ data$n_procon) 
t.test(data$minimal3 ~ data$n_procon) 

###################################################
##################################################

# Prepare Regression analysis
data$hdesk_total_f <- factor(data$hdesk_total)
data$all_total_f <- factor(data$all_total)

# Table 4: 

# model 1:
glm_rel_hdesk3a <- polr(data$hdesk_total_f ~ n_procon * share_catholics + n_procon + share_catholics + conservative + ln_pop_dens, 
                        data=data, Hess=TRUE)
coeftest(glm_rel_hdesk3a)
exp(coef(glm_rel_hdesk3a)) # proportional odds ratios displayed in table 2
summary(glm_rel_hdesk3a)
nobs(glm_rel_hdesk3a)
logLik(glm_rel_hdesk3a)

# model 2:
glm_rel_extra <- glm(data$extra_bin ~ n_procon * share_catholics + n_procon + share_catholics + conservative + ln_pop_dens, family="binomial", data=data)

summary(glm_rel_extra)
logLik(glm_rel_extra)
nobs(glm_rel_extra)
exp(coef(glm_rel_extra))

# model 3
glm_cons_mini <- glm(data$minimal3 ~ n_procon * conservative + n_procon + conservative + share_catholics + ln_pop_dens, family="binomial", data=data)
summary(glm_cons_mini)
logLik(glm_cons_mini)
nobs(glm_cons_mini)

# model 4:
glm_cons_hdesk2a <- polr(data$hdesk_total_f ~ n_procon * conservative + n_procon + conservative + share_catholics + ln_pop_dens, 
                         data=data, Hess=TRUE)

coeftest(glm_cons_hdesk2a)
logLik(glm_cons_hdesk2a)
exp(coef(glm_cons_hdesk2a)) # table 5, model 4
summary(glm_cons_hdesk2a)
nobs(glm_cons_hdesk2a)

# model 5
glm_cons_total2a <- polr(data$all_total_f ~ n_procon * conservative + n_procon + conservative + share_catholics + ln_pop_dens, 
                         data=data, Hess=TRUE)
coeftest(glm_cons_total2a)
logLik(glm_cons_total2a)
exp(coef(glm_cons_total2a))
summary(glm_cons_total2a)
nobs(glm_cons_total2a)



####################################################
####################################################
# FIGURES
####################################################
####################################################
# Figure 1: based on estimated coefficients and not log odds 
# to better highlight the changing sign of treatment effects

glm_rel_hdesk <- glm(data$hdesk_total ~ n_procon * share_catholics + n_procon + share_catholics + conservative + ln_pop_dens, family="poisson", data=data)
summary(glm_rel_hdesk)
logLik(glm_rel_hdesk)
nobs(glm_rel_hdesk)

t99 <- interplot(m=glm_rel_hdesk, var1 = "n_procon", var2="share_catholics", size=1.5, color="black") + 
  xlab("share of catholics")+
  ylab("estimated treatment effect")+
  theme_minimal()+
  theme(text = element_text(size=14))+
  ggtitle("help desk quality")+
  geom_hline(yintercept = 0, linetype = "dashed") +
  theme(text = element_text(size=14))+
  theme(plot.title = element_text(hjust = 0.5)) +
  theme(axis.text.x = element_text(size = 14)) +
  theme(axis.title.x = element_text(size = 14)) +
  theme(axis.text.y = element_text(size = 14)) +
  theme(axis.title.y = element_text(size = 14)) +
  scale_y_continuous(limits = c(-3,3), breaks = c(-3,0,3))
t99

t22 <- interplot(m= glm_rel_extra, var1 = "n_procon", var2 = "share_catholics", size=1.5, color="black") + 
  xlab("share of catholics")+
  ylab("estimated treatment effect")+
  theme_minimal()+
  theme(text = element_text(size=14))+
  ggtitle("extra service")+
  geom_hline(yintercept = 0, linetype = "dashed") +
  theme(text = element_text(size=14))+
  theme(plot.title = element_text(hjust = 0.5)) +
  theme(axis.text.x = element_text(size = 14)) +
  theme(axis.title.x = element_text(size = 14)) +
  theme(axis.text.y = element_text(size = 14)) +
  scale_y_continuous(limits = c(-3.2, 3.2), breaks = c(-3,0,3))

tiff("figure1.tiff", height=1500, width=1500, res=300)
grid.arrange(t99, t22, ncol=2)
dev.off()

####################################################
####################################################
# Figure 2: the figure displays the estimated coefficients (log odds) and not 
# the odds ratios reported in table 4 to better highlight the changing sign of treatment effects

glm_cons_total <- glm(data$all_total ~ n_procon * conservative + n_procon + conservative + share_catholics + ln_pop_dens, family="poisson", data=data)

m66 <- interplot(m= glm_cons_total, var1 = "n_procon", var2 = "conservative", esize=1, size=3, color="black", ercolor="black") + 
  theme_minimal() +
  xlab("conservative mayor")+
  ylab("estimated treatment effect")+
  #theme_bw()+
  theme(text = element_text(size=14))+
  ggtitle("total quality")+
  theme(plot.title = element_text(hjust = 0.5)) +
  geom_hline(yintercept = 0, linetype = "dashed") +
  theme(axis.text.x = element_text(size = 14)) +
  theme(axis.title.x = element_text(size = 14)) +
  theme(axis.text.y = element_text(size = 14)) +
  theme(axis.title.y = element_text(size = 14)) + 
  scale_y_continuous(limits = c(-1.1,4.1), breaks = c(-1,0,1,2,3,4))


glm_cons_hdesk <- glm(data$hdesk_total ~ n_procon * conservative + n_procon + conservative + share_catholics + ln_pop_dens, family="poisson", data=data)
m33 <- interplot(m= glm_cons_hdesk, var1 = "n_procon", var2 = "conservative", esize=1, size=3, color="black", ercolor="black") + 
  xlab("conservative mayor")+
  ylab("estimated treatment effect")+
  theme_minimal()+
  theme(text = element_text(size=14))+
  ggtitle("help desk quality")+
  theme(plot.title = element_text(hjust = 0.5)) +
  geom_hline(yintercept = 0, linetype = "dashed") +
  theme(axis.text.x = element_text(size = 14)) +
  theme(axis.title.x = element_text(size = 14)) +
  theme(axis.text.y = element_text(size = 14)) +
  theme(axis.title.y = element_text(size = 14)) + 
  scale_y_continuous(limits = c(-1.1,4.1), breaks = c(-1,0,1,2,3,4))

m33


m10 <- interplot(m= glm_cons_mini, var1 = "n_procon", var2 = "conservative", esize=1, size=3, color="black", ercolor="black") + 
  xlab("conservative mayor")+
  ylab("estimated treatment effect")+
  theme_minimal()+
  theme(text = element_text(size=14))+
  ggtitle("minimalist answers")+
  theme(plot.title = element_text(hjust = 0.5)) +
  geom_hline(yintercept = 0, linetype = "dashed") +
  theme(axis.text.x = element_text(size = 14)) +
  theme(axis.title.x = element_text(size = 14)) +
  theme(axis.text.y = element_text(size = 14)) +
  theme(axis.title.y = element_text(size = 14)) + 
  
  scale_y_continuous(limits = c(-1.1,4.1), breaks = c(-1,0,1,2,3,4))

m10

tiff('figure2.tif', height=1500, width=2500, res=300)
grid.arrange(m66, m33, m10, ncol=3)
dev.off()

######################################################
######################################################
# Figure 3: total answer quality by mayor and request

data$conservative2 <- factor(data$conservative, labels=c("not conservative", "conservative"))

tiff("figure3.tif", height=1500, width=1500, res=300 )
ggplot(aes(y=all_total, x=n_procon2, fill=conservative2), data=data) +
  geom_boxplot() +
  facet_wrap(~conservative2) +
  theme_minimal() +
  ylab("total answer quality") +
  xlab("") +    
  scale_y_continuous(limits = c(0,8), breaks = c(0,1,2,3,4,5,6,7,8)) +
  theme(axis.text.x = element_text(size = 14)) +
  theme(axis.title.x = element_text(size = 14)) +
  theme(axis.text.y = element_text(size = 14)) +
  theme(axis.title.y = element_text(size = 14)) +
  theme(legend.position="none") +
  theme(strip.text = element_text(face="bold", size=14)) +
  scale_fill_manual(values=c("grey45", "grey80"))
dev.off()

########################################################
########################################################
#######################################################
#                     APPENDIX
######################################################
######################################################
# Table A1: Balance Testing: description of groups after randomization
table_cath <- describeBy(data$share_catholics, data$n_procon, mat= TRUE)
table_cath
table_cons <- describeBy(data$conservative, data$n_procon, mat= TRUE)
table_cons
table_dens <- describeBy(data$ln_pop_dens, data$n_procon, mat= TRUE)
table_dens

######################################################
######################################################
# Table A2:

#model 1:
glm_rel_hdesk3b <- polr(data$hdesk_total_f ~ n_procon * share_catholics + n_procon + share_catholics + conservative + ln_pop_dens
                        + dum2 + dum3 + dum4 + dum5 + dum6 + dum7 + dum8 + dum9 + dum10 + dum11 + dum12 + dum13, 
                        data=data, Hess=TRUE)

coeftest(glm_rel_hdesk3b)
logLik(glm_rel_hdesk3b)
exp(coef(glm_rel_hdesk3b))
summary(glm_rel_hdesk3b)
nobs(glm_rel_hdesk3b)


# model 2:
glm_rel_extra2 <- glm(data$extra_bin ~ n_procon * share_catholics + n_procon + share_catholics + conservative + ln_pop_dens
                      + dum2 + dum3 + dum4 + dum5 + dum6 + dum7 + dum8 + dum9 + dum10 + dum11 + dum12 + dum13, 
                      family="binomial", data=data)
summary(glm_rel_extra2)
logLik(glm_rel_extra2)
nobs(glm_rel_extra2)
exp(coef(glm_rel_extra2))

# model 3
glm_cons_mini2 <- glm(data$minimal3 ~ n_procon * conservative + n_procon + conservative + share_catholics + ln_pop_dens
                      + dum2 + dum3 + dum4 + dum5 + dum6 + dum7 + dum8 + dum9 + dum10 + dum11 + dum12 + dum13, 
                      family="binomial", data=data)
summary(glm_cons_mini2)
logLik(glm_cons_mini2)
nobs(glm_cons_mini2)
exp(coef(glm_cons_mini2))


# model 4:
glm_cons_hdesk2b <- polr(data$hdesk_total_f ~ n_procon * conservative + n_procon + conservative + share_catholics + ln_pop_dens
                         + dum2 + dum3 + dum4 + dum5 + dum6 + dum7 + dum8 + dum9 + dum10 + dum11 + dum12 + dum13, 
                         data=data, Hess=TRUE)

coeftest(glm_cons_hdesk2b)
logLik(glm_cons_hdesk2b)
exp(coef(glm_cons_hdesk2b))
summary(glm_cons_hdesk2b)
nobs(glm_cons_hdesk2b)


# model 5
glm_cons_total2b <- polr(data$all_total_f ~ n_procon * conservative + n_procon + conservative + share_catholics + ln_pop_dens
                         + dum2 + dum3 + dum4 + dum5 + dum6 + dum7 + dum8 + dum9 + dum10 + dum11 + dum12 + dum13, 
                         data=data, Hess=TRUE)
coeftest(glm_cons_total2b)
logLik(glm_cons_total2b)
exp(coef(glm_cons_total2b))
summary(glm_cons_total2b)
nobs(glm_cons_total2b)


######################################################
######################################################
# Table A3: Exploring Patterns of Response Probability

glm_rel_answer1 <- glm(data$answer ~ n_procon * share_catholics + n_procon + share_catholics + conservative + ln_pop_dens, family="binomial", data=data)
summary(glm_rel_answer1)
logLik(glm_rel_answer1)
nobs(glm_rel_answer1)
exp(coef(glm_rel_answer1))

glm_cons_answer2 <- glm(data$answer ~ n_procon * conservative + n_procon + share_catholics + conservative + ln_pop_dens, family="binomial", data=data)
summary(glm_cons_answer2)
logLik(glm_cons_answer2)
nobs(glm_cons_answer2)
exp(coef(glm_cons_answer2))

###################################################
# Table A4: Assessing potential multicollinearity
###################################################

cor(data$share_catholics, data$conservative)
cor(data$share_catholics, data$ln_pop_dens)
cor(data$conservative,data$ln_pop_dens)


####################################################
